
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# DISCLAIMER AND GENERAL INFORMATION
#
# File: SI3_vignette_experiment_map.R
# Purpose: Produces results presented in section SI3
# Date: June 2025
# Data: pulled through 00_data_prep.R
#
# See 00_data_prep.R for technical disclaimer on R versions
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Load data
source("00_data_prep.R")

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (A) Analysis of vignette experiment (Figure A2) ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Define control variables
ctrls <- paste(c("factor(female)", "factor(middleage)", "factor(old)", "factor(educ_cat)", "factor(pol_left)", "factor(pol_right)"), collapse="+")

# Recode dependent variable into binary
full <- full %>%
        mutate(invol_dummy=case_when(cc_invol>mean(cc_invol, na.rm=TRUE)~1,
                                     .default = 0))

# Model 1: full data
# Regression model
exp1 <- lm(paste("invol_dummy~treatment+sample+treatment*sample", ctrls, sep="+"), data=full)

# Calculate marginal effects
me1 <- plot_slopes(exp1, variable="treatment", by="sample", conf_level=.95, draw=FALSE)


# Model 2: general population data only
genpop <- full %>%
          filter(survey=="Prolific") %>%
          mutate(region=case_when(gor_england=="Yorkshire and Humber" ~ "Northern England",
                                  residence=="Scotland" ~ "Scotland",
                                  residence=="Wales" ~ "Wales",
                                  residence=="England" & gor_england !="Yorkshire and Humber" ~ "General population",
                                  .default=NA),
                 region=factor(region, levels=c("General population", "Northern England", "Scotland", "Wales"), ordered=FALSE))

# Regression model
exp2 <- lm(paste("invol_dummy~treatment+region+treatment*region", ctrls, sep="+"), data=genpop)

# Calculate marginal effects
me2 <- plot_slopes(exp2, variable="treatment", by="region", conf_level=.95, draw=FALSE)

# Collect count information
count <- tibble(sample=levels(full$sample),count=as.numeric(table(full$sample))) %>%
                mutate(data="full") %>%
            bind_rows(
                tibble(sample=levels(genpop$region),count=as.numeric(table(genpop$region))) %>%
                mutate(data="genpop")
                )

# Create plotting data
dt <- bind_rows(me1,me2) %>%
      mutate(data=case_when(is.na(sample)==FALSE ~ "full",
                            .default="genpop"),
             sample=case_when(is.na(sample)==TRUE ~ region,
                            .default=sample)) %>%
      arrange(sample) %>%
      right_join(count, by=join_by("sample","data"))

# Change facet labels
group.labs = c("full"="Full data", "genpop"="General population data")

# Change group labels 
dt <- dt %>%
  mutate(sample=case_when(sample=="Northern England" ~ "Yorkshire and Cumbria",
                          sample!="Northern England" ~ sample,
                          .default=NA)) %>%
  mutate(sample=factor(sample, levels=c("General population", "Yorkshire and Cumbria", "Scotland", "Wales"), 
                       ordered=FALSE)) %>%
  as_tibble()


# Create results plot
p <- ggplot(data=dt, aes(x=estimate, y=sample, group=data)) +
  geom_rect(data=NULL,aes(xmin=-Inf,xmax=Inf,ymin=.5,ymax=1.5),
            fill=cols[1], alpha=.1, color=NA) +
  geom_point() + 
  geom_vline(xintercept=0, linetype="dashed") +
  geom_linerangeh(aes(xmin = conf.low, xmax = conf.high)) + 
  geom_text(aes(label= paste0("n=", count), group=data),
            position = position_dodge(width = 0.8), vjust=-1, size=3) +
  facet_wrap(~data, labeller = labeller(data = group.labs)) +
  ylab("") +
  xlab("Marginal effect of information vignette") + 
  scale_color_manual(values=c("black",cols[2]), na.value="black") + 
  theme_classic() +
  theme(legend.position="none")
p

ggsave(p, file="./plots/SI3_vignette.pdf", width=6, height=4)



# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#                         END OF FILE
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


